From bc76bb665d0d24147f7031e690c217cee8f68bb2 Mon Sep 17 00:00:00 2001 From: "kaf24@labyrinth.cl.cam.ac.uk" Date: Thu, 26 Aug 2004 13:57:01 +0000 Subject: [PATCH] bitkeeper revision 1.1159.1.106 (412dec2dJdF7UJz-ddgHVVOekKtAHQ) Completed first cut of Xen support for grant tables. The device drivers now need modifying to use them. --- xen/arch/x86/memory.c | 94 +++++++- xen/common/grant_table.c | 243 +++++++++++++++++++-- xen/include/hypervisor-ifs/grant_table.h | 59 ++++- xen/include/hypervisor-ifs/hypervisor-if.h | 19 +- xen/include/xen/grant_table.h | 21 +- 5 files changed, 397 insertions(+), 39 deletions(-) diff --git a/xen/arch/x86/memory.c b/xen/arch/x86/memory.c index d2f58b4cfa..778edf0ee1 100644 --- a/xen/arch/x86/memory.c +++ b/xen/arch/x86/memory.c @@ -443,7 +443,8 @@ get_page_from_l1e( if ( unlikely((count_info & PGC_count_mask) == 0) || unlikely(e == NULL) || unlikely(!get_domain(e)) ) return 0; - rc = gnttab_try_map(e, d, page, l1v & _PAGE_RW); + rc = gnttab_try_map( + e, d, pfn, (l1v & _PAGE_RW) ? GNTTAB_MAP_RW : GNTTAB_MAP_RO); put_domain(e); return rc; } @@ -484,11 +485,12 @@ get_page_from_l2e( static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) { - struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)]; unsigned long l1v = l1_pgentry_val(l1e); + unsigned long pfn = l1_pgentry_to_pagenr(l1e); + struct pfn_info *page = &frame_table[pfn]; struct domain *e = page->u.inuse.domain; - if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) ) + if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(pfn) ) return; if ( unlikely(e != d) ) @@ -504,7 +506,8 @@ static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) * mappings and which unmappings are counted via the grant entry, but * really it doesn't matter as privileged domains have carte blanche. */ - if ( likely(gnttab_try_unmap(e, d, page, l1v & _PAGE_RW)) ) + if ( likely(gnttab_try_map(e, d, pfn, (l1v & _PAGE_RW) ? + GNTTAB_UNMAP_RW : GNTTAB_UNMAP_RO)) ) return; /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */ } @@ -824,6 +827,7 @@ static int do_extended_command(unsigned long ptr, unsigned long val) struct domain *d = current, *nd, *e; u32 x, y; domid_t domid; + grant_ref_t gntref; switch ( cmd ) { @@ -978,6 +982,88 @@ static int do_extended_command(unsigned long ptr, unsigned long val) } break; + case MMUEXT_TRANSFER_PAGE: + domid = (domid_t)(val >> 16); + gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF)); + + if ( unlikely(IS_XEN_HEAP_FRAME(page)) || + unlikely(!pfn_is_ram(pfn)) || + unlikely((e = find_domain_by_id(domid)) == NULL) ) + { + MEM_LOG("Bad frame (%08lx) or bad domid (%d).\n", pfn, domid); + okay = 0; + break; + } + + spin_lock(&d->page_alloc_lock); + + /* + * The tricky bit: atomically release ownership while there is just one + * benign reference to the page (PGC_allocated). If that reference + * disappears then the deallocation routine will safely spin. + */ + nd = page->u.inuse.domain; + y = page->count_info; + do { + x = y; + if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != + (1|PGC_allocated)) || + unlikely(nd != d) ) + { + MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p," + " caf=%08x, taf=%08x\n", page_to_pfn(page), + d, d->domain, nd, x, page->u.inuse.type_info); + spin_unlock(&d->page_alloc_lock); + put_domain(e); + okay = 0; + break; + } + __asm__ __volatile__( + LOCK_PREFIX "cmpxchg8b %2" + : "=d" (nd), "=a" (y), + "=m" (*(volatile u64 *)(&page->count_info)) + : "0" (d), "1" (x), "c" (NULL), "b" (x) ); + } + while ( unlikely(nd != d) || unlikely(y != x) ); + + /* + * Unlink from 'd'. At least one reference remains (now anonymous), so + * noone else is spinning to try to delete this page from 'd'. + */ + d->tot_pages--; + list_del(&page->list); + + spin_unlock(&d->page_alloc_lock); + + spin_lock(&e->page_alloc_lock); + + /* Check that 'e' will accept the page and has reservation headroom. */ + ASSERT(e->tot_pages <= e->max_pages); + if ( unlikely(e->tot_pages == e->max_pages) || + unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) ) + { + MEM_LOG("Transferee has no reservation headroom (%ld,%ld), or " + "provided a bad grant ref.\n", e->tot_pages, e->max_pages); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + okay = 0; + break; + } + + /* Okay, add the page to 'e'. */ + if ( unlikely(e->tot_pages++ == 0) ) + get_knownalive_domain(e); + list_add_tail(&page->list, &e->page_list); + page->u.inuse.domain = e; + + spin_unlock(&e->page_alloc_lock); + + /* Transfer is all done: tell the guest about its new page frame. */ + gnttab_notify_transfer(e, gntref, pfn); + + put_domain(e); + break; + case MMUEXT_REASSIGN_PAGE: if ( unlikely(!IS_PRIV(d)) ) { diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index f76b18f8a8..aa682048a6 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -73,12 +73,22 @@ gnttab_update_pin_status( grant_ref_t ref; u16 pin_flags; struct domain *ld, *rd; - u16 sflags, prev_sflags; + u16 sflags; active_grant_entry_t *act; grant_entry_t *sha; long rc = 0; unsigned long frame; + /* + * We bound the number of times we retry CMPXCHG on memory locations + * that we share with a guest OS. The reason is that the guest can modify + * that location at a higher rate than we can read-modify-CMPXCHG, so + * the guest could cause us to livelock. There are a few cases + * where it is valid for the guest to race our updates (e.g., to change + * the GTF_readonly flag), so we allow a few retries before failing. + */ + int retries = 0; + ld = current; /* Bitwise-OR avoids short-circuiting which screws control flow. */ @@ -127,7 +137,7 @@ gnttab_update_pin_status( for ( ; ; ) { - u32 scombo, prev_scombo; + u32 scombo, prev_scombo, new_scombo; if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) || unlikely(sdom != ld->domain) ) @@ -135,29 +145,34 @@ gnttab_update_pin_status( "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", sflags, sdom, ld->domain); - sflags |= GTF_reading; + /* Merge two 16-bit values into a 32-bit combined update. */ + /* NB. Endianness! */ + prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; + + new_scombo = scombo | GTF_reading; if ( !(pin_flags & GNTPIN_readonly) ) { - sflags |= GTF_writing; + new_scombo |= GTF_writing; if ( unlikely(sflags & GTF_readonly) ) PIN_FAIL(EINVAL, "Attempt to write-pin a r/o grant entry.\n"); } - /* Merge two 16-bit values into a 32-bit combined update. */ - /* NB. Endianness! */ - prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; - - /* NB. prev_sflags is updated in place to seen value. */ - if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, - prev_scombo | GTF_writing)) ) + /* NB. prev_scombo is updated in place to seen value. */ + if ( unlikely(cmpxchg_user((u32 *)&sha->flags, + prev_scombo, + new_scombo)) ) PIN_FAIL(EINVAL, "Fault while modifying shared flags and domid.\n"); /* Did the combined update work (did we see what we expected?). */ - if ( prev_scombo == scombo ) + if ( likely(prev_scombo == scombo) ) break; + if ( retries++ == 4 ) + PIN_FAIL(EINVAL, + "Shared grant entry is unstable.\n"); + /* Didn't see what we expected. Split out the seen flags & dom. */ /* NB. Endianness! */ sflags = (u16)prev_scombo; @@ -243,10 +258,12 @@ gnttab_update_pin_status( else if ( act->status & GNTPIN_readonly ) { sflags = sha->flags; - do { - prev_sflags = sflags; - if ( unlikely(prev_sflags & GTF_readonly) ) + for ( ; ; ) + { + u16 prev_sflags; + + if ( unlikely(sflags & GTF_readonly) ) PIN_FAIL(EINVAL, "Attempt to write-pin a r/o grant entry.\n"); @@ -255,13 +272,23 @@ gnttab_update_pin_status( PIN_FAIL(EINVAL, "Attempt to write-pin a unwritable page.\n"); + prev_sflags = sflags; + /* NB. prev_sflags is updated in place to seen value. */ if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, prev_sflags | GTF_writing)) ) PIN_FAIL(EINVAL, "Fault while modifying shared flags.\n"); + + if ( likely(prev_sflags == sflags) ) + break; + + if ( retries++ == 4 ) + PIN_FAIL(EINVAL, + "Shared grant entry is unstable.\n"); + + sflags = prev_sflags; } - while ( prev_sflags != sflags ); } /* Update status word -- this includes device accessibility. */ @@ -281,6 +308,51 @@ gnttab_update_pin_status( return rc; } +static long +gnttab_setup_table( + gnttab_setup_table_t *uop) +{ + gnttab_setup_table_t op; + struct domain *d; + + if ( unlikely(__copy_from_user(&op, uop, sizeof(op)) != 0) ) + { + DPRINTK("Fault while reading gnttab_setup_table_t.\n"); + return -EFAULT; + } + + if ( unlikely(op.nr_frames > 1) ) + { + DPRINTK("Xen only supports one grant-table frame per domain.\n"); + return -EINVAL; + } + + if ( op.dom == DOMID_SELF ) + op.dom = current->domain; + + if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) + { + DPRINTK("Bad domid %d.\n", op.dom); + return -ESRCH; + } + + if ( op.nr_frames == 1 ) + { + ASSERT(d->grant_table != NULL); + + if ( unlikely(put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT, + &op.frame_list[0])) ) + { + DPRINTK("Fault while writing frame list.\n"); + put_domain(d); + return -EFAULT; + } + } + + put_domain(d); + return 0; +} + long do_grant_table_op( gnttab_op_t *uop) @@ -297,6 +369,9 @@ do_grant_table_op( case GNTTABOP_update_pin_status: rc = gnttab_update_pin_status(&uop->u.update_pin_status); break; + case GNTTABOP_setup_table: + rc = gnttab_setup_table(&uop->u.setup_table); + break; default: rc = -ENOSYS; break; @@ -307,18 +382,146 @@ do_grant_table_op( int gnttab_try_map( - struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly) + struct domain *rd, struct domain *ld, unsigned long frame, int op) { + grant_table_t *t; + active_grant_entry_t *a; + u16 *ph, h; + + if ( unlikely((t = rd->grant_table) == NULL) ) + return 0; + + spin_lock(&t->lock); + + ph = &t->maphash[GNT_MAPHASH(frame)]; + while ( (h = *ph) != GNT_MAPHASH_INVALID ) + { + if ( (a = &t->active[*ph])->frame != frame ) + goto found; + ph = &a->next; + } + + fail: + spin_unlock(&t->lock); return 0; + + found: + if ( !(a->status & GNTPIN_host_accessible) ) + goto fail; + + switch ( op ) + { + case GNTTAB_MAP_RO: + if ( (a->status & GNTPIN_rmap_mask) == GNTPIN_rmap_mask ) + goto fail; + a->status += 1 << GNTPIN_rmap_shift; + break; + + case GNTTAB_MAP_RW: + if ( (a->status & GNTPIN_wmap_mask) == GNTPIN_wmap_mask ) + goto fail; + a->status += 1 << GNTPIN_wmap_shift; + break; + + case GNTTAB_UNMAP_RO: + if ( (a->status & GNTPIN_rmap_mask) == 0 ) + goto fail; + a->status -= 1 << GNTPIN_rmap_shift; + break; + + case GNTTAB_UNMAP_RW: + if ( (a->status & GNTPIN_wmap_mask) == 0 ) + goto fail; + a->status -= 1 << GNTPIN_wmap_shift; + break; + + default: + BUG(); + } + + spin_unlock(&t->lock); + return 1; } -int -gnttab_try_unmap( - struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly) +int +gnttab_prepare_for_transfer( + struct domain *rd, struct domain *ld, grant_ref_t ref) { + grant_table_t *t; + grant_entry_t *e; + domid_t sdom; + u16 sflags; + u32 scombo, prev_scombo; + int retries = 0; + + if ( unlikely((t = rd->grant_table) == NULL) || + unlikely(ref >= NR_GRANT_ENTRIES) ) + { + DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", rd->domain, ref); + return 0; + } + + spin_lock(&t->lock); + + e = &t->shared[ref]; + + sflags = e->flags; + sdom = e->domid; + + for ( ; ; ) + { + if ( unlikely(sflags != GTF_accept_transfer) || + unlikely(sdom != ld->domain) ) + { + DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", + sflags, sdom, ld->domain); + goto fail; + } + + /* Merge two 16-bit values into a 32-bit combined update. */ + /* NB. Endianness! */ + prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; + + /* NB. prev_scombo is updated in place to seen value. */ + if ( unlikely(cmpxchg_user((u32 *)&e->flags, prev_scombo, + prev_scombo | GTF_transfer_committed)) ) + { + DPRINTK("Fault while modifying shared flags and domid.\n"); + goto fail; + } + + /* Did the combined update work (did we see what we expected?). */ + if ( likely(prev_scombo == scombo) ) + break; + + if ( retries++ == 4 ) + { + DPRINTK("Shared grant entry is unstable.\n"); + goto fail; + } + + /* Didn't see what we expected. Split out the seen flags & dom. */ + /* NB. Endianness! */ + sflags = (u16)prev_scombo; + sdom = (u16)(prev_scombo >> 16); + } + + spin_unlock(&t->lock); + return 1; + + fail: + spin_unlock(&t->lock); return 0; } +void +gnttab_notify_transfer( + struct domain *rd, grant_ref_t ref, unsigned long frame) +{ + wmb(); /* Ensure that the reassignment is globally visible. */ + rd->grant_table->shared[ref].frame = frame; +} + int grant_table_create( struct domain *d) diff --git a/xen/include/hypervisor-ifs/grant_table.h b/xen/include/hypervisor-ifs/grant_table.h index 97513cce60..17fb28d6a0 100644 --- a/xen/include/hypervisor-ifs/grant_table.h +++ b/xen/include/hypervisor-ifs/grant_table.h @@ -28,18 +28,32 @@ * 3. Write memory barrier (WMB). * 4. Write ent->flags, inc. valid type. * - * Removing an unused GTF_permit_access entry: + * Invalidating an unused GTF_permit_access entry: * 1. flags = ent->flags. * 2. Observe that !(flags & (GTF_reading|GTF_writing)). * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). * NB. No need for WMB as reuse of entry is control-dependent on success of * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). * - * Removing an unused GTF_accept_transfer entry: - * 1. Check result of SMP-safe CMPXCHG(&ent->frame, 0, ). - * 2. Clear ent->flags. - * 3. WMB (ordering of step 2 vs. steps 1,2 of introducing a new entry). - * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing non-zero value in ent->frame). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for ent->frame != 0. + * * Changing a GTF_permit_access from writable to read-only: * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. * @@ -90,6 +104,18 @@ typedef struct { #define _GTF_writing (4) #define GTF_writing (1<<_GTF_writing) +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until the address of the + * transferred frame is written into the entry. + * NB. It is safe for the guest to spin-wait on the frame address: + * Xen will always write the frame address in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1<<_GTF_transfer_committed) + /*********************************** * GRANT TABLE QUERIES AND USES @@ -124,11 +150,32 @@ typedef struct { MEMORY_PADDING; } PACKED gnttab_update_pin_status_t; /* 16 bytes */ +/* + * GNTTABOP_setup_table: Set up a grant table for comprising at least + * pages. The frame addresses are written to the . + * Only addresses are written, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +#define GNTTABOP_setup_table 1 +typedef struct { + /* IN parameters. */ + domid_t dom; /* 0 */ + u16 nr_frames; /* 2 */ + u32 __pad; + /* OUT parameters. */ + unsigned long *frame_list; /* 8 */ + MEMORY_PADDING; +} PACKED gnttab_setup_table_t; /* 16 bytes */ + typedef struct { u32 cmd; /* GNTTABOP_* */ /* 0 */ u32 __reserved; /* 4 */ union { /* 8 */ gnttab_update_pin_status_t update_pin_status; + gnttab_setup_table_t setup_table; u8 __dummy[16]; } PACKED u; } PACKED gnttab_op_t; /* 24 bytes */ diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 53417604a4..3a0189a655 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -121,17 +121,23 @@ * ptr[:2] -- Linear address of LDT base (NB. must be page-aligned). * val[:8] -- Number of entries in LDT. * + * val[7:0] == MMUEXT_TRANSFER_PAGE: + * val[31:16] -- Domain to whom page is to be transferred. + * (val[15:8],ptr[9:2]) -- 16-bit reference into transferee's grant table. + * ptr[:12] -- Page frame to be reassigned to the FD. + * (NB. The frame must currently belong to the calling domain). + * * val[7:0] == MMUEXT_SET_FOREIGNDOM: - * val[31:15] -- Domain to set as the Foreign Domain (FD). + * val[31:16] -- Domain to set as the Foreign Domain (FD). * (NB. DOMID_SELF is not recognised) * If FD != DOMID_IO then the caller must be privileged. * + * val[7:0] == MMUEXT_CLEAR_FOREIGNDOM: + * Clears the FD. + * * val[7:0] == MMUEXT_REASSIGN_PAGE: * ptr[:2] -- A machine address within the page to be reassigned to the FD. * (NB. page must currently belong to the calling domain). - * - * val[7:0] == MMUEXT_CLEAR_FOREIGNDOM: - * Clears the FD. */ #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ #define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */ @@ -145,9 +151,10 @@ #define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */ #define MMUEXT_INVLPG 7 /* ptr = VA to invalidate */ #define MMUEXT_SET_LDT 8 /* ptr = VA of table; val = # entries */ -#define MMUEXT_SET_FOREIGNDOM 9 /* val[31:15] = dom */ -#define MMUEXT_REASSIGN_PAGE 10 +#define MMUEXT_TRANSFER_PAGE 9 /* ptr = MA of frame; val[31:16] = dom */ +#define MMUEXT_SET_FOREIGNDOM 10 /* val[31:16] = dom */ #define MMUEXT_CLEAR_FOREIGNDOM 11 +#define MMUEXT_REASSIGN_PAGE 12 #define MMUEXT_CMD_MASK 255 #define MMUEXT_CMD_SHIFT 8 diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h index 395959323c..52c7e828d7 100644 --- a/xen/include/xen/grant_table.h +++ b/xen/include/xen/grant_table.h @@ -77,9 +77,24 @@ void grant_table_destroy( struct domain *d); /* Create/destroy host-CPU mappings via a grant-table entry. */ +#define GNTTAB_MAP_RO 0 +#define GNTTAB_MAP_RW 1 +#define GNTTAB_UNMAP_RO 2 +#define GNTTAB_UNMAP_RW 3 int gnttab_try_map( - struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly); -int gnttab_try_unmap( - struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly); + struct domain *rd, struct domain *ld, unsigned long frame, int op); + +/* + * Check that the given grant reference (rd,ref) allows 'ld' to transfer + * ownership of a page frame. If so, lock down the grant entry. + */ +int +gnttab_prepare_for_transfer( + struct domain *rd, struct domain *ld, grant_ref_t ref); + +/* Notify 'rd' of a completed transfer via an already-locked grant entry. */ +void +gnttab_notify_transfer( + struct domain *rd, grant_ref_t ref, unsigned long frame); #endif /* __XEN_GRANT_H__ */ -- 2.30.2